## Registered S3 method overwritten by 'treeio':
## method from
## root.phylo ape
## This is MicrobeR 0.3.2. See https://jbisanz.github.io/MicrobeR/ for usage information.
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-6
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0 ✓ purrr 0.3.4
## ✓ tibble 3.0.1 ✓ dplyr 0.8.5
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:ape':
##
## rotate
## The following object is masked from 'package:MicrobeR':
##
## mean_sd
SVtab<-read_qza("/Volumes/turnbaughlab/qb3share/qiyanang/IDEO_manuscript_Rd2/data/gnoto_updated_pipeline_outputs/ASV_table.qza")$data %>% as.data.frame()
SVseq<-read_qza("/Volumes/turnbaughlab/qb3share/qiyanang/IDEO_manuscript_Rd2/data/gnoto_updated_pipeline_outputs/ASV_sequences.qza")$data %>% as.data.frame() %>% rename(c("SV"="x"))
taxonomy<-read.delim("/Volumes/turnbaughlab/qb3share/qiyanang/IDEO_manuscript_Rd2/data/gnoto_updated_pipeline_outputs/ASV_d2taxonomy.txt", header=T)
lookup<-(SVseq %>% rownames_to_column("ASV")) %>%
left_join(taxonomy, by="ASV") %>%
column_to_rownames("ASV")
## Warning: Column `ASV` joining character vector and factor, coercing into
## character vector
#Here I am parsing out the different experiments. I use "IDEOmeta4" as the major variable for the bulk of the analysis
IDEOmeta <- read.csv("/Volumes/turnbaughlab/qb3share/qiyanang/IDEO_manuscript_Rd2/data/gnoto_mouse_metadata.csv", header = T)
IDEOmeta <- filter(IDEOmeta, !is.na(SequencingID))
IDEOmeta<- filter(IDEOmeta, Organism=="Mouse")
rownames(IDEOmeta) <- IDEOmeta$SequencingID
IDEOmeta1 <- filter(IDEOmeta, Experiment == "IDEO1")
rownames(IDEOmeta1) <- IDEOmeta1$SequencingID
IDEOmeta2 <- filter(IDEOmeta, Experiment == "IDEO2")
rownames(IDEOmeta2) <- IDEOmeta2$SequencingID
IDEOmeta4 <- IDEOmeta
rownames(IDEOmeta4) <- IDEOmeta4$SequencingID
IDEO124SVtab <- SVtab[,rownames(IDEOmeta)]
IDEO1SVtab <- SVtab[,rownames(IDEOmeta1)]
IDEO2SVtab <- SVtab[,rownames(IDEOmeta2)]
IDEO4SVtab <- SVtab[,rownames(IDEOmeta4)]
#SV tree
SVtree_denovo <- read_qza("/Volumes/turnbaughlab/qb3share/qiyanang/IDEO_manuscript_Rd2/data/gnoto_updated_pipeline_outputs/ASV_denovotree.qza")
# SVtree_denovo$data<-SVtree_denovo$data %>%
# ape::drop.tip(., taxonomy %>% filter(is.na(Phylum)) %>% pull(ASV)) %>%
# ape::drop.tip(., "80e52422048b1f819347739465288380")
#Setting the color palette choice and statistical comparisons for the ggpubr package.
Whitecolor="#E69F00"
Chinesecolor="#0072B2"
statgroups <- list(c("Chinese", "White"))
# theme for pcoas
theme_pcoa<- function () {
theme_classic(base_size=10, base_family="Helvetica") +
theme(axis.text = element_text(size=8, color = "black"),
axis.title = element_text(size=10, color="black"),
legend.text = element_text(size=8, color = "black"),
legend.title = element_text(size=10, color = "black"),
plot.title = element_text(size=10, color="black")) +
theme(panel.border = element_rect(color="black", size=1, fill=NA))
}
#IDEO4 QC analysis
#Lookinat at the number of sequences per sample
histogram(colSums(IDEO4SVtab))
#Removing 0 sum rows; eliminates around half of the ASVs
IDEO4SVtab <- IDEO4SVtab[rowSums(IDEO124SVtab)>0,]
#Checking tree tips
plot(SVtree_denovo$data)
#SVtree_denovo
IDEO4tree <- drop.tip(SVtree_denovo$data,SVtree_denovo$data$tip.label[!SVtree_denovo$data$tip.label %in% rownames(IDEO4SVtab)])
#IDEO4tree
IDEO4trees <- list()
IDEO4trees$Raw <- IDEO4tree
#Diversity
#Filter and process ASV table for one version of comparing between donor and recipients
ASVs <- list()
ASVs$Raw <- IDEO4SVtab
ASVs$Filtered <- Confidence.Filter(ASVs$Raw, 3, 10, TRUE)
## Filtering features such that they are present in at least 3 samples with a total of at least 10 reads.
## ...There are 1450908 reads and 698 features
## ...After filtering there are 1414695 reads and 389 features
ASVs$CZM <- zCompositions::cmultRepl(t(ASVs$Filtered),
label = 0,
method = "CZM",
output = "p-counts",
suppress.print = TRUE) %>%
t()
ASVs$CLR <- apply(log2(ASVs$CZM), 2, function(x)x-mean(x))
ASVs$Subsampled <- Subsample.Table(ASVs$Filtered, VERBOSE = T)
## Subsampling feature table to 6437 , currently has 389 taxa.
## ...sampled to 6437 reads with 388 taxa
#Beta diversity and ordination
#Drop tips for the filtered filtered and subsampled trees
IDEO4trees$Filtered <- drop.tip(IDEO4trees$Raw, tip = IDEO4trees$Raw$tip.label[!IDEO4trees$Raw$tip.label %in% rownames(ASVs$Filtered)])
IDEO4trees$Subsampled <- drop.tip(IDEO4trees$Raw, tip = IDEO4trees$Raw$tip.label[!IDEO4trees$Raw$tip.label %in% rownames(ASVs$Subsampled)])
#Create distance matrix list
DM <- list()
DM$Subsampled <- list()
DM$Filtered <- list()
DM$CZM <- list()
#Calculate the Distance Matrices and put them into the list
DM$Subsampled[["Subsampled Euclidean"]] <- dist(t(ASVs$Subsampled), method = "euclidean"); gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 4698994 251.0 7949214 424.6 NA 7949214 424.6
## Vcells 8106291 61.9 14786712 112.9 16384 12143152 92.7
DM$Subsampled[["Subsampled CLR Euclidean"]] <- dist(t(ASVs$CLR), method = "euclidean"); gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 4699120 251.0 7949214 424.6 NA 7949214 424.6
## Vcells 8107493 61.9 14786712 112.9 16384 12143152 92.7
DM$Subsampled[["Subsampled Bray Curtis"]] <- vegdist(t(Make.Proportion(ASVs$Subsampled)), method = "bray"); gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 4699244 251.0 7949214 424.6 NA 7949214 424.6
## Vcells 8108707 61.9 14786712 112.9 16384 12143152 92.7
DM$Subsampled[["Jensen-Shannon Divergence"]] <- phyloseq::distance(phyloseq(otu_table(Make.Proportion(ASVs$Subsampled), taxa_are_rows = T)), method = "jsd", parallel = T); gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 4712077 251.7 7949214 424.6 NA 7949214 424.6
## Vcells 8133342 62.1 14786712 112.9 16384 13888398 106.0
#DM$Subsampled[["weighted Unifrac"]] <- UniFrac(phyloseq(otu_table(Make.Proportion(ASVs$Subsampled), taxa_are_rows = T), phy=IDEO4trees$Subsampled), weighted = T, parallel = T); gc()
#DM$Subsampled[["Unweighted Unifrac"]] <- UniFrac(phyloseq(otu_table(Make.Proportion(ASVs$Subsampled), taxa_are_rows = T), phy=IDEO4trees$Subsampled), weighted = F, parallel = T); gc()
#Evaluate the distance distribution
allDMs <-
lapply(names(DM$Subsampled), function(dname){
DM$Subsampled[[dname]] %>%
as.matrix() %>%
as.data.frame() %>%
rownames_to_column("Subject") %>%
gather(-Subject, key = "Match", value=Distance) %>%
mutate(Metric=dname)
}) %>%
do.call(bind_rows, .)
allDMs %>%
mutate(Metric=factor(Metric, levels=c("Subsampled Euclidean", "Subsampled CLR Euclidean", "Subsampled Bray Curtis", "Jensen-Shannon Divergence", "weighted Unifrac", "Unweighted Unifrac"))) %>%
ggplot(aes(x=Distance, color=Metric)) +
geom_freqpoly() +
theme_MicrobeR() +
theme(legend.position = "none") +
facet_wrap(~Metric, scales = "free")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Genearting a PhilR Transformation to match the human data Qi Yan is generating
IDEO4trees$PhilR <- IDEO4trees$Filtered
IDEO4trees$PhilR <- makeNodeLabel(IDEO4trees$PhilR, method="number", prefix = "n")
ASVs$PhilR <- t(philr(t(ASVs$CZM), IDEO4trees$PhilR, part.weights = 'enorm.x.gm.counts', ilr.weights = 'blw.sqrt'))
## Building Sequential Binary Partition from Tree...
## Building Contrast Matrix...
## Transforming the Data...
## Calculating ILR Weights...
## Warning in calculate.blw(tree, method = "sum.children"): Note: a total of 19
## tip edges with zero length have been replaced with a small pseudocount of the
## minimum non-zero edge length ( 5e-09 ).
#It is not subsampled, but putting it into the subsampled list will prevent additional modification to the code
DM$Subsampled[["PhilR Euclidean"]] <- dist(t(ASVs$PhilR), method = "euclidean") ; gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 4816330 257.3 7949214 424.6 NA 7949214 424.6
## Vcells 8378141 64.0 14786712 112.9 16384 13888398 106.0
## Subsampled Euclidean-Subsampled
## Subsampled CLR Euclidean-Subsampled
## Subsampled Bray Curtis-Subsampled
## Jensen-Shannon Divergence-Subsampled
## PhilR Euclidean-Subsampled
## Warning: Expected 2 pieces. Additional pieces discarded in 10 rows [31, 32, 33,
## 34, 35, 36, 37, 38, 39, 40].
## Warning: Expected 2 pieces. Additional pieces discarded in 10 rows [31, 32, 33,
## 34, 35, 36, 37, 38, 39, 40].
## Joining, by = "SequencingID"
## Warning: Column `SequencingID` joining character vector and factor, coercing
## into character vector
## Warning: Expected 2 pieces. Additional pieces discarded in 44 rows [133, 134,
## 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150,
## 151, 152, ...].
## Joining, by = "SequencingID"
## Warning: Column `SequencingID` joining character vector and factor, coercing
## into character vector
## Warning: Expected 2 pieces. Additional pieces discarded in 44 rows [133, 134,
## 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150,
## 151, 152, ...].
#Adonis for significance
adonis(DM$Subsampled$`PhilR Euclidean`~Diet*Ethnicity, data = IDEOmeta4, permutations = 999)
##
## Call:
## adonis(formula = DM$Subsampled$`PhilR Euclidean` ~ Diet * Ethnicity, data = IDEOmeta4, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Diet 1 2849.0 2848.95 19.3274 0.29014 0.001 ***
## Ethnicity 1 614.2 614.16 4.1665 0.06255 0.007 **
## Diet:Ethnicity 1 459.9 459.89 3.1199 0.04684 0.016 *
## Residuals 40 5896.2 147.40 0.60048
## Total 43 9819.2 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Adjusting for donor with multiple recipient mice
perm.all <- how(nperm = 999)
setBlocks(perm.all) <- with(IDEOmeta4, SampleID)
adonis(DM$Subsampled$`PhilR Euclidean`~Diet*Ethnicity, data = IDEOmeta4, permutations = perm.all)
##
## Call:
## adonis(formula = DM$Subsampled$`PhilR Euclidean` ~ Diet * Ethnicity, data = IDEOmeta4, permutations = perm.all)
##
## Blocks: with(IDEOmeta4, SampleID)
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Diet 1 2849.0 2848.95 19.3274 0.29014 0.001 ***
## Ethnicity 1 614.2 614.16 4.1665 0.06255 0.001 ***
## Diet:Ethnicity 1 459.9 459.89 3.1199 0.04684 0.068 .
## Residuals 40 5896.2 147.40 0.60048
## Total 43 9819.2 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
patches + plot_annotation(tag_levels = 'A')
#ggsave("/Volumes/turnbaughlab/qb3share/qiyanang/IDEO_manuscript_Rd2/figures/gnoto/Fig_S9.pdf", height=3.5, width=5.8, useDingbats=F)